In this project, a multivariate time series data which is collected by a single three-axis accelerometer is manipulated and visualized. The data belongs to the gesture patterns of eight different users (will be mentioned as “class” from now on) and consists of over 4000 instances. The data for X, Y, and Z axes is provided separately in the following link.

The primary aim is to visualize an instance from each class after manipulating the data properly. Then, two alternative time series representation methods will be applied and final plots will be compared.

Data Manipulation

library(data.table)
library(ggplot2)

# The data is imported

x <- fread("C:/Users/AHMET/Desktop/uWaveGestureLibrary_X_TRAIN")
y <- fread("C:/Users/AHMET/Desktop/uWaveGestureLibrary_Y_TRAIN")
z <- fread("C:/Users/AHMET/Desktop/uWaveGestureLibrary_Z_TRAIN")

# ID variable is added to each axis data

x[,id:=1:.N]
y[,id:=1:.N]
z[,id:=1:.N]

# Class information is added

setnames(x,'V1','Class')
setnames(y,'V1','Class')
setnames(z,'V1','Class')

# Each data table is melted for better visual representation

long_train_x <- melt(x,id.vars=c('id','Class'))
long_train_x[,time:=as.numeric(gsub("\\D", "", variable))-1]
long_train_x <- long_train_x[,list(id,Class,time,value)]
long_train_x <- long_train_x[order(Class,id,time)]
setnames(long_train_x,'value','AccelerationX')
head(long_train_x)
##    id Class time AccelerationX
## 1: 11     1    1    -0.7914472
## 2: 11     1    2    -0.7914472
## 3: 11     1    3    -0.7958727
## 4: 11     1    4    -0.8100650
## 5: 11     1    5    -0.8492300
## 6: 11     1    6    -0.9034648
long_train_y <- melt(y,id.vars=c('id','Class'))
long_train_y[,time:=as.numeric(gsub("\\D", "", variable))-1]
long_train_y <- long_train_y[,list(id,Class,time,value)]
long_train_y <- long_train_y[order(Class,id,time)]
setnames(long_train_y,'value','AccelerationY')
head(long_train_y)
##    id Class time AccelerationY
## 1: 11     1    1     -1.959984
## 2: 11     1    2     -1.959984
## 3: 11     1    3     -1.956600
## 4: 11     1    4     -1.945750
## 5: 11     1    5     -1.915809
## 6: 11     1    6     -1.874347
long_train_z <- melt(z,id.vars=c('id','Class'))
long_train_z[,time:=as.numeric(gsub("\\D", "", variable))-1]
long_train_z <- long_train_z[,list(id,Class,time,value)]
long_train_z <- long_train_z[order(Class,id,time)]
setnames(long_train_z,'value','AccelerationZ')
head(long_train_z)
##    id Class time AccelerationZ
## 1: 11     1    1    -0.2490781
## 2: 11     1    2    -0.2490781
## 3: 11     1    3    -0.2514390
## 4: 11     1    4    -0.2590101
## 5: 11     1    5    -0.2799033
## 6: 11     1    6    -0.3088358

Finding Speed and Location Data

In this part, to be able to visualize the gesture recognition instances, location data for each axis will be gathered by taking cumulative sums of the speed data. The speed data for each axis will be gathered by the same way, that is, by taking cumulative sums of the acceleration data.

table_xyz <- copy(long_train_x)
table_xyz[,AccelerationY:=long_train_y$AccelerationY]
table_xyz[,AccelerationZ:=long_train_z$AccelerationZ]
table_xyz[,SpeedX:=cumsum(table_xyz$AccelerationX)]
table_xyz[,SpeedY:=cumsum(table_xyz$AccelerationY)]
table_xyz[,SpeedZ:=cumsum(table_xyz$AccelerationZ)]
table_xyz[,LocationX:=cumsum(table_xyz$SpeedX)]
table_xyz[,LocationY:=cumsum(table_xyz$SpeedY)]
table_xyz[,LocationZ:=cumsum(table_xyz$SpeedZ)]
head(table_xyz)
##    id Class time AccelerationX AccelerationY AccelerationZ     SpeedX
## 1: 11     1    1    -0.7914472     -1.959984    -0.2490781 -0.7914472
## 2: 11     1    2    -0.7914472     -1.959984    -0.2490781 -1.5828944
## 3: 11     1    3    -0.7958727     -1.956600    -0.2514390 -2.3787671
## 4: 11     1    4    -0.8100650     -1.945750    -0.2590101 -3.1888321
## 5: 11     1    5    -0.8492300     -1.915809    -0.2799033 -4.0380621
## 6: 11     1    6    -0.9034648     -1.874347    -0.3088358 -4.9415269
##        SpeedY     SpeedZ   LocationX  LocationY  LocationZ
## 1:  -1.959984 -0.2490781  -0.7914472  -1.959984 -0.2490781
## 2:  -3.919968 -0.4981562  -2.3743415  -5.879951 -0.7472342
## 3:  -5.876568 -0.7495951  -4.7531086 -11.756519 -1.4968294
## 4:  -7.822319 -1.0086052  -7.9419407 -19.578838 -2.5054346
## 5:  -9.738128 -1.2885085 -11.9800028 -29.316966 -3.7939431
## 6: -11.612474 -1.5973444 -16.9215298 -40.929440 -5.3912875

Visualization

In this part, by sorting each class separately in the order of increasing ID values, the first set of data in the list will be chosen as an instance and plotted in 3D.

Instance for Class #1

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
table_class1 <- table_xyz[(Class == 1)]
head(table_class1)
##    id Class time AccelerationX AccelerationY AccelerationZ     SpeedX
## 1: 11     1    1    -0.7914472     -1.959984    -0.2490781 -0.7914472
## 2: 11     1    2    -0.7914472     -1.959984    -0.2490781 -1.5828944
## 3: 11     1    3    -0.7958727     -1.956600    -0.2514390 -2.3787671
## 4: 11     1    4    -0.8100650     -1.945750    -0.2590101 -3.1888321
## 5: 11     1    5    -0.8492300     -1.915809    -0.2799033 -4.0380621
## 6: 11     1    6    -0.9034648     -1.874347    -0.3088358 -4.9415269
##        SpeedY     SpeedZ   LocationX  LocationY  LocationZ
## 1:  -1.959984 -0.2490781  -0.7914472  -1.959984 -0.2490781
## 2:  -3.919968 -0.4981562  -2.3743415  -5.879951 -0.7472342
## 3:  -5.876568 -0.7495951  -4.7531086 -11.756519 -1.4968294
## 4:  -7.822319 -1.0086052  -7.9419407 -19.578838 -2.5054346
## 5:  -9.738128 -1.2885085 -11.9800028 -29.316966 -3.7939431
## 6: -11.612474 -1.5973444 -16.9215298 -40.929440 -5.3912875
x1 <- subset(table_xyz$LocationX, table_xyz$id == 11)
y1 <- subset(table_xyz$LocationY, table_xyz$id == 11)
z1 <- subset(table_xyz$LocationZ, table_xyz$id == 11)
fig1 <- plot_ly(x = x1, y = y1, z = z1, type = 'scatter3d', mode = 'lines',
        opacity = 1, line = list(width = 6, reverscale = FALSE))
fig1

Instance for Class #2

table_class2 <- table_xyz[(Class == 2)]
head(table_class2)
##    id Class time AccelerationX AccelerationY AccelerationZ   SpeedX   SpeedY
## 1: 15     2    1      1.199507      1.627288      1.951568 1.199506 1.627290
## 2: 15     2    2      1.199507      1.627288      1.951568 2.399013 3.254578
## 3: 15     2    3      1.233217      1.627288      1.939750 3.632230 4.881866
## 4: 15     2    4      1.325678      1.627288      1.907334 4.957909 6.509155
## 5: 15     2    5      1.363205      1.627288      1.894178 6.321114 8.136443
## 6: 15     2    6      1.378723      1.627288      1.888738 7.699837 9.763731
##       SpeedZ LocationX LocationY LocationZ
## 1:  1.951575  205944.3  -2441885  -1636043
## 2:  3.903143  205946.7  -2441882  -1636039
## 3:  5.842893  205950.3  -2441877  -1636033
## 4:  7.750227  205955.3  -2441871  -1636025
## 5:  9.644406  205961.6  -2441862  -1636016
## 6: 11.533144  205969.3  -2441853  -1636004
x2 <- subset(table_xyz$LocationX, table_xyz$id == 15)
y2 <- subset(table_xyz$LocationY, table_xyz$id == 15)
z2 <- subset(table_xyz$LocationZ, table_xyz$id == 15)
fig2 <- plot_ly(x = x2, y = y2, z = z2, type = 'scatter3d', mode = 'lines',
        opacity = 1, line = list(width = 6, reverscale = FALSE))
fig2

Instance for Class #3

table_class3 <- table_xyz[(Class == 3)]
head(table_class3)
##    id Class time AccelerationX AccelerationY AccelerationZ      SpeedX
## 1:  4     3    1   0.005184784     0.3740667     0.3094552 0.005184672
## 2:  4     3    2   0.005184784     0.3740667     0.3094552 0.010369456
## 3:  4     3    3   0.005184784     0.3740667     0.3094552 0.015554241
## 4:  4     3    4   0.005184784     0.3740667     0.3094552 0.020739025
## 5:  4     3    5   0.005184784     0.3740667     0.3094552 0.025923809
## 6:  4     3    6   0.005184784     0.3740667     0.3094552 0.031108594
##       SpeedY    SpeedZ LocationX LocationY LocationZ
## 1: 0.3740647 0.3094619  283445.8  -3191468  -1400778
## 2: 0.7481314 0.6189171  283445.8  -3191468  -1400777
## 3: 1.1221981 0.9283723  283445.8  -3191467  -1400776
## 4: 1.4962648 1.2378275  283445.8  -3191465  -1400775
## 5: 1.8703315 1.5472827  283445.9  -3191463  -1400774
## 6: 2.2443982 1.8567378  283445.9  -3191461  -1400772
x3 <- subset(table_xyz$LocationX, table_xyz$id == 4)
y3 <- subset(table_xyz$LocationY, table_xyz$id == 4)
z3 <- subset(table_xyz$LocationZ, table_xyz$id == 4)
fig3 <- plot_ly(x = x3, y = y3, z = z3, type = 'scatter3d', mode = 'lines',
        opacity = 1, line = list(width = 6, reverscale = FALSE))
fig3

Instance for Class #4

table_class4 <- table_xyz[(Class == 4)]
head(table_class4)
##    id Class time AccelerationX AccelerationY AccelerationZ   SpeedX     SpeedY
## 1:  5     4    1      1.286198    -0.3974369    -0.4660215 1.286207 -0.3974360
## 2:  5     4    2      1.286198    -0.3974369    -0.4660215 2.572404 -0.7948729
## 3:  5     4    3      1.286198    -0.3974369    -0.4660215 3.858602 -1.1923098
## 4:  5     4    4      1.286198    -0.3974369    -0.4660215 5.144800 -1.5897468
## 5:  5     4    5      1.286198    -0.3974369    -0.4660215 6.430998 -1.9871837
## 6:  5     4    6      1.286198    -0.3974369    -0.4660215 7.717196 -2.3846206
##        SpeedZ LocationX LocationY LocationZ
## 1: -0.4660289  -1972297  -3679816 -510722.7
## 2: -0.9320504  -1972295  -3679817 -510723.6
## 3: -1.3980719  -1972291  -3679818 -510725.0
## 4: -1.8640935  -1972285  -3679820 -510726.9
## 5: -2.3301150  -1972279  -3679822 -510729.2
## 6: -2.7961365  -1972271  -3679824 -510732.0
x4 <- subset(table_xyz$LocationX, table_xyz$id == 5)
y4 <- subset(table_xyz$LocationY, table_xyz$id == 5)
z4 <- subset(table_xyz$LocationZ, table_xyz$id == 5)
fig4 <- plot_ly(x = x4, y = y4, z = z4, type = 'scatter3d', mode = 'lines',
        opacity = 1, line = list(width = 6, reverscale = FALSE))
fig4

Instance for Class #5

table_class5 <- table_xyz[(Class == 5)]
head(table_class5)
##    id Class time AccelerationX AccelerationY AccelerationZ   SpeedX    SpeedY
## 1:  2     5    1      1.627311     0.6666239      1.786869 1.627327 0.6666244
## 2:  2     5    2      1.627311     0.6666239      1.786869 3.254638 1.3332483
## 3:  2     5    3      1.627311     0.6666239      1.786869 4.881949 1.9998721
## 4:  2     5    4      1.627311     0.6666239      1.786869 6.509260 2.6664960
## 5:  2     5    5      1.627311     0.6666239      1.786869 8.136571 3.3331198
## 6:  2     5    6      1.627311     0.6666239      1.786869 9.763882 3.9997437
##       SpeedZ LocationX LocationY LocationZ
## 1:  1.786858  446467.1  -2218692  -1138534
## 2:  3.573727  446470.4  -2218691  -1138530
## 3:  5.360596  446475.2  -2218689  -1138525
## 4:  7.147464  446481.8  -2218686  -1138517
## 5:  8.934333  446489.9  -2218683  -1138509
## 6: 10.721202  446499.7  -2218679  -1138498
x5 <- subset(table_xyz$LocationX, table_xyz$id == 2)
y5 <- subset(table_xyz$LocationY, table_xyz$id == 2)
z5 <- subset(table_xyz$LocationZ, table_xyz$id == 2)
fig5 <- plot_ly(x = x5, y = y5, z = z5, type = 'scatter3d', mode = 'lines',
        opacity = 1, line = list(width = 6, reverscale = FALSE))
fig5

Instance for Class #6

table_class6 <- table_xyz[(Class == 6)]
head(table_class6)
##    id Class time AccelerationX AccelerationY AccelerationZ     SpeedX
## 1:  1     6    1    -0.3042432     -2.119396     -1.528965 -0.3042271
## 2:  1     6    2    -0.3042432     -2.119396     -1.528965 -0.6084704
## 3:  1     6    3    -0.3042432     -2.119396     -1.528965 -0.9127136
## 4:  1     6    4    -0.3042432     -2.119396     -1.528965 -1.2169568
## 5:  1     6    5    -0.3042432     -2.119396     -1.528965 -1.5212000
## 6:  1     6    6    -0.3042432     -2.119396     -1.528965 -1.8254432
##        SpeedY    SpeedZ LocationX LocationY LocationZ
## 1:  -2.119381 -1.528970   1257061  695244.7   1696623
## 2:  -4.238777 -3.057936   1257061  695240.5   1696620
## 3:  -6.358173 -4.586901   1257060  695234.1   1696615
## 4:  -8.477568 -6.115866   1257059  695225.6   1696609
## 5: -10.596964 -7.644831   1257057  695215.0   1696601
## 6: -12.716360 -9.173796   1257055  695202.3   1696592
x6 <- subset(table_xyz$LocationX, table_xyz$id == 1)
y6 <- subset(table_xyz$LocationY, table_xyz$id == 1)
z6 <- subset(table_xyz$LocationZ, table_xyz$id == 1)
fig6 <- plot_ly(x = x6, y = y6, z = z6, type = 'scatter3d', mode = 'lines',
        opacity = 1, line = list(width = 6, reverscale = FALSE))
fig6

Instance for Class #7

table_class7 <- table_xyz[(Class == 7)]
head(table_class7)
##    id Class time AccelerationX AccelerationY AccelerationZ   SpeedX   SpeedY
## 1:  7     7    1      1.474328      1.259287      1.237417 1.474327 1.259284
## 2:  7     7    2      1.474328      1.259287      1.237417 2.948655 2.518571
## 3:  7     7    3      1.474328      1.259287      1.237417 4.422983 3.777858
## 4:  7     7    4      1.474328      1.259287      1.237417 5.897310 5.037145
## 5:  7     7    5      1.474328      1.259287      1.237417 7.371638 6.296431
## 6:  7     7    6      1.474328      1.259287      1.237417 8.845966 7.555718
##      SpeedZ LocationX LocationY LocationZ
## 1: 1.237391   1742839 -998424.2 -276864.5
## 2: 2.474809   1742842 -998421.7 -276862.1
## 3: 3.712226   1742846 -998417.9 -276858.3
## 4: 4.949643   1742852 -998412.8 -276853.4
## 5: 6.187060   1742859 -998406.5 -276847.2
## 6: 7.424478   1742868 -998399.0 -276839.8
x7 <- subset(table_xyz$LocationX, table_xyz$id == 7)
y7 <- subset(table_xyz$LocationY, table_xyz$id == 7)
z7 <- subset(table_xyz$LocationZ, table_xyz$id == 7)
fig7 <- plot_ly(x = x7, y = y7, z = z7, type = 'scatter3d', mode = 'lines',
        opacity = 1, line = list(width = 6, reverscale = FALSE))
fig7

Instance for Class #8

table_class8 <- table_xyz[(Class == 8)]
head(table_class8)
##    id Class time AccelerationX AccelerationY AccelerationZ     SpeedX    SpeedY
## 1:  6     8    1    -0.4792525      -1.08008     0.6564008 -0.4792638 -1.080080
## 2:  6     8    2    -0.4792525      -1.08008     0.6564008 -0.9585162 -2.160160
## 3:  6     8    3    -0.4792525      -1.08008     0.6564008 -1.4377687 -3.240241
## 4:  6     8    4    -0.4792525      -1.08008     0.6564008 -1.9170211 -4.320321
## 5:  6     8    5    -0.4792525      -1.08008     0.6564008 -2.3962736 -5.400402
## 6:  6     8    6    -0.4792525      -1.08008     0.6564008 -2.8755261 -6.480482
##       SpeedZ LocationX LocationY LocationZ
## 1: 0.6563666   1424117  -1149898 -326687.9
## 2: 1.3127674   1424116  -1149900 -326686.6
## 3: 1.9691682   1424115  -1149903 -326684.6
## 4: 2.6255690   1424113  -1149907 -326682.0
## 5: 3.2819698   1424111  -1149913 -326678.7
## 6: 3.9383707   1424108  -1149919 -326674.8
x8 <- subset(table_xyz$LocationX, table_xyz$id == 6)
y8 <- subset(table_xyz$LocationY, table_xyz$id == 6)
z8 <- subset(table_xyz$LocationZ, table_xyz$id == 6)
fig8 <- plot_ly(x = x8, y = y8, z = z8, type = 'scatter3d', mode = 'lines',
        opacity = 1, line = list(width = 6, reverscale = FALSE))
fig8

Time Series Representations

In both approaches, location data in the X axis (LocationX) is selected for investigation since there are three variables and representations can be shown using only one variable.

Sample Statistics

In this approach, sample statistics for LocationX data are calculated for each class separately.

library(BBmisc)
## 
## Attaching package: 'BBmisc'
## The following object is masked from 'package:base':
## 
##     isFALSE
# Mean is calculated

mean_class <- setDT(table_xyz)[, mean(LocationX), by = Class]
setnames(mean_class,'V1','Mean')

# Median is calculated

median_class <- setDT(table_xyz)[, median(LocationX), by = Class]
setnames(median_class,'V1','Median')

# Standard deviation is calculated

sd_class <- setDT(table_xyz)[, sd(LocationX), by = Class]
setnames(sd_class,'V1','StDev')

stats <- copy(mean_class)
stats[,Median:=median_class$Median]
stats[,StDev:=sd_class$StDev]
stats
##    Class      Mean    Median     StDev
## 1:     1  117111.4  134033.4  53796.64
## 2:     2  236503.2  236055.7  18726.29
## 3:     3 -826367.7 -806507.6 651878.43
## 4:     4 -751405.8 -747953.9 688787.49
## 5:     5  950337.8 1016160.6 244720.67
## 6:     6 1499107.4 1535415.3 189190.74
## 7:     7 1561317.9 1578033.2 106669.38
## 8:     8 1585863.7 1595699.6  88141.69

Median and mean values for each class are plotted below to see whether they differ from each other significantly.

ggplot(stats, aes(x=Class, y=Median, color=Class)) + geom_step()

ggplot(stats, aes(x=Class, y=Mean, color=Class)) + geom_step()

Piecewise Aggregate Approximation

In this approach, piecewise method is applied for each class separately. To be able to plot them together, segment lengths are calculated for each class separately so that index length is 40 for all classes.

library(TSrepr)
library(repr)

# PAA for class = 1

data_ts1=table_xyz[Class==1]$LocationX
paa1=repr_paa(data_ts1, 961, meanC)
paa <- data.table(paa1)

# PAA for class = 2

data_ts2=table_xyz[Class==2]$LocationX
paa2=repr_paa(data_ts2, 851, meanC)
paa[,paa2:=paa2]

# PAA for class = 3

data_ts3=table_xyz[Class==3]$LocationX
paa3=repr_paa(data_ts3, 835, meanC)
paa[,paa3:=paa3]

# PAA for class = 4

data_ts4=table_xyz[Class==4]$LocationX
paa4=repr_paa(data_ts4, 867, meanC)
paa[,paa4:=paa4]

# PAA for class = 5

data_ts5=table_xyz[Class==5]$LocationX
paa5=repr_paa(data_ts5, 1001, meanC)
paa[,paa5:=paa5]

# PAA for class = 6

data_ts6=table_xyz[Class==6]$LocationX
paa6=repr_paa(data_ts6, 876, meanC)
paa[,paa6:=paa6]

# PAA for class = 7

data_ts7=table_xyz[Class==7]$LocationX
paa7=repr_paa(data_ts7, 882, meanC)
paa[,paa7:=paa7]

# PAA for class = 8

data_ts8=table_xyz[Class==8]$LocationX
paa8=repr_paa(data_ts8, 788, meanC)
paa[,paa8:=paa8]
paa[,index:=1:.N]
head(paa)
##         paa1     paa2      paa3     paa4     paa5    paa6    paa7    paa8 index
## 1:  2256.491 210819.2 259645.61 -1939238 473062.5 1249821 1739992 1425727     1
## 2: 17247.827 212210.9 194839.51 -1871649 483405.0 1254094 1720354 1442248     2
## 3: 15206.197 213092.2 132305.17 -1801114 527704.1 1256074 1718221 1451153     3
## 4: 39295.900 215297.0  88066.26 -1741354 576372.5 1258265 1707239 1450668     4
## 5: 45533.185 211864.7  36358.79 -1681082 628118.3 1275467 1702599 1452429     5
## 6: 45801.326 211930.4 -29012.79 -1620872 626234.2 1275363 1708314 1473580     6

Then, the data table for PAA values is melted for plotting purposes. The long data table is used to plot all PAA values together below.

long_paa <- melt(paa, id.vars = "index")
head(long_paa) 
##    index variable     value
## 1:     1     paa1  2256.491
## 2:     2     paa1 17247.827
## 3:     3     paa1 15206.197
## 4:     4     paa1 39295.900
## 5:     5     paa1 45533.185
## 6:     6     paa1 45801.326
ggplot(long_paa, aes(index, value, col = variable)) + geom_line()

Conclusion

When the plots for both approaches are investigated, it is easier to differentiate the classes in the piecewise aggregate approximation method. In the sample statistics approach, when the data table is read, it is seen that the statistical values are different for each class. However, the plots are not clear enough to see any significant difference. Hence, piecewise aggregate approximation is chosen as the better representation.